Time Until Large Events#
import pandas as pd
import numpy as np
import datetime as dt
csv_file = "../datasets/Formatted_ETAS_Output.csv"
etas = pd.read_csv(csv_file, sep = ',', lineterminator='\n')
csv_file = "../datasets/All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
Show code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_11588\2239509985.py:4: DtypeWarning: Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
Data Filtering#
Converting the date columns to datetime
Date > 1960-01-01 and < 2023-01-01
Longitude > -123 and < -113
Latitude > 29 and < 39
Show code cell source
etas["Date"] = pd.to_datetime(etas["Date"], errors="coerce", format="%m/%d/%y")
etas.loc[etas["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)
etas = etas[(etas['Date'] > pd.to_datetime('1960-01-01')) & (etas['Date'] < pd.to_datetime('2023-01-01'))]
#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
etas = etas[etas['X'] > -123]
etas = etas[etas['X'] < -113]
etas = etas[etas['Y'] < 39]
etas = etas[etas['Y'] > 29]
etas.head()
| Date | Time | Year | X | Y | Magnitude | Z\r | |
|---|---|---|---|---|---|---|---|
| 1 | 1960-01-02 | 0:08:49.00 | 1960.006125 | -115.6222 | 33.0793 | 4.25 | 7.9322 |
| 2 | 1960-01-02 | 0:10:31.00 | 1960.007305 | -115.6323 | 33.1220 | 3.03 | 8.4015 |
| 3 | 1960-01-02 | 0:10:32.00 | 1960.007320 | -115.5851 | 33.0745 | 3.03 | 7.9678 |
| 4 | 1960-01-02 | 0:11:07.00 | 1960.007720 | -115.6256 | 33.0290 | 3.08 | 7.9737 |
| 5 | 1960-01-02 | 0:11:17.00 | 1960.007840 | -115.6050 | 33.0276 | 3.61 | 7.9322 |
Show code cell source
usgs["Date"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs.drop(columns=["time"], inplace=True)
usgs = usgs[(pd.to_datetime(usgs['Date']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['Date']) < pd.to_datetime('2023-01-01'))]
usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')
#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]
time = []
for i in usgs['Date']:
time.append(pd.to_datetime(i))
usgs['Date'] = time
usgs.head()
| latitude | longitude | depth | mag | magType | nst | gap | dmin | rms | net | ... | place | type | horizontalError | depthError | magError | magNst | status | locationSource | magSource\r | Date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 240 | 33.397500 | -116.393333 | 3.88 | 4.14 | mw | 132 | 16 | 0.07391 | 0.19 | ci | ... | 16 km N of Borrego Springs, CA | earthquake | 0.1 | 0.38 | NaN | 6 | reviewed | ci | ci\r | 2022-12-31 |
| 241 | 34.355667 | -116.921833 | 4.73 | 3.47 | mw | 121 | 25 | 0.07845 | 0.15 | ci | ... | 11km SSE of Lucerne Valley, CA | earthquake | 0.09 | 0.41 | NaN | 4 | reviewed | ci | ci\r | 2022-12-31 |
| 246 | 37.620167 | -122.025000 | 3.82 | 3.34 | mw | 141 | 16 | NaN | 0.16 | nc | ... | 3km N of Union City, CA | earthquake | 0.1 | 0.17 | NaN | 3 | reviewed | nc | nc\r | 2022-12-22 |
| 262 | 37.918167 | -122.304000 | 5.48 | 3.57 | mw | 170 | 19 | 0.01598 | 0.15 | nc | ... | 1km ENE of El Cerrito, CA | earthquake | 0.1 | 0.17 | NaN | 4 | reviewed | nc | nc\r | 2022-12-17 |
| 263 | 36.604667 | -121.209333 | 8.88 | 3.28 | ml | 67 | 55 | 0.03812 | 0.09 | nc | ... | 10km NW of Pinnacles, CA | earthquake | 0.14 | 0.28 | 0.129 | 72 | reviewed | nc | nc\r | 2022-12-13 |
5 rows × 22 columns
Data Grouping And Merging#
Data is grouped into 1 day chunks based on the max magnitude
max_mag_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('S')).Magnitude.max())
max_mag_etas.reset_index(inplace=True)
max_mag_etas.head()
| Date | Magnitude | |
|---|---|---|
| 0 | 1960-01-02 00:00:00 | 4.25 |
| 1 | 1960-01-03 00:00:00 | 3.90 |
| 2 | 1960-01-04 00:00:00 | 4.24 |
| 3 | 1960-01-05 00:00:00 | 3.40 |
| 4 | 1960-01-06 00:00:00 | 3.47 |
max_mag_usgs = pd.DataFrame(usgs.groupby(usgs['Date'].dt.to_period('S')).mag.max())
max_mag_usgs.reset_index(inplace=True)
max_mag_usgs.head()
| Date | mag | |
|---|---|---|
| 0 | 1960-01-02 00:00:00 | 4.04 |
| 1 | 1960-01-05 00:00:00 | 3.03 |
| 2 | 1960-01-07 00:00:00 | 3.64 |
| 3 | 1960-01-08 00:00:00 | 3.10 |
| 4 | 1960-01-11 00:00:00 | 3.79 |
large_earthquake = 6.5
Large Events#
A label is added to Large Event data
large_mag_etas = max_mag_etas.copy()
large_mag_etas["Large Event"] = (large_mag_etas["Magnitude"] > large_earthquake).astype(int)
large_mag_etas["Date"] = large_mag_etas["Date"].dt.to_timestamp()
large_mag_etas = large_mag_etas.copy()
condition = large_mag_etas['Large Event'] == 1
subset = large_mag_etas.loc[condition].copy()
large_mag_etas.loc[condition, 'time_diff'] = subset['Date'].diff().dt.days
large_mag_etas.loc[0, 'time_diff'] = pd.NA
large_mag_usgs = max_mag_usgs.copy()
large_mag_usgs["Large Event"] = (large_mag_usgs["mag"] > large_earthquake).astype(int)
large_mag_usgs["Date"] = large_mag_usgs["Date"].dt.to_timestamp()
large_mag_usgs = large_mag_usgs.copy()
condition = large_mag_usgs['Large Event'] == 1
subset = large_mag_usgs.loc[condition].copy()
large_mag_usgs.loc[condition, 'time_diff'] = subset['Date'].diff().dt.days
large_mag_usgs.loc[0, 'time_diff'] = pd.NA
large_mag_etas.head()
| Date | Magnitude | Large Event | time_diff | |
|---|---|---|---|---|
| 0 | 1960-01-02 | 4.25 | 0 | NaN |
| 1 | 1960-01-03 | 3.90 | 0 | NaN |
| 2 | 1960-01-04 | 4.24 | 0 | NaN |
| 3 | 1960-01-05 | 3.40 | 0 | NaN |
| 4 | 1960-01-06 | 3.47 | 0 | NaN |
large_mag_usgs.head()
| Date | mag | Large Event | time_diff | |
|---|---|---|---|---|
| 0 | 1960-01-02 | 4.04 | 0 | NaN |
| 1 | 1960-01-05 | 3.03 | 0 | NaN |
| 2 | 1960-01-07 | 3.64 | 0 | NaN |
| 3 | 1960-01-08 | 3.10 | 0 | NaN |
| 4 | 1960-01-11 | 3.79 | 0 | NaN |
Graphing Time Until Large Events#
import plotly.express as px
import plotly.graph_objects as go
Show code cell source
fig = go.Figure()
fig.add_trace(go.Scatter(
x=large_mag_etas['Date'],
y=large_mag_etas['time_diff'],
mode='markers',
marker=dict(color='red', line=dict(color='black', width=1)),
name='ETAS'
))
fig.add_trace(go.Scatter(
x=large_mag_usgs['Date'],
y=large_mag_usgs['time_diff'],
mode='markers',
marker=dict(color='blue', line=dict(color='black', width=1)),
name='USGS'
))
fig.update_layout(
title='Time Between Large Events -ETAS vs USGS',
xaxis_title='Date',
yaxis_title='Time Difference (Days)',
legend=dict(x=0, y=1, traceorder='normal', orientation='h') # Adjust legend position
)
fig.show()
Calcutating Energy#
Converting the magnitudes to energy through the formula: (1/1.5) * log(10^(1.5*mag))
const = (1/1.5)
large_mag_etas['Energy'] = 10**(1.5*large_mag_etas['Magnitude'])
large_mag_etas['Energy'] = np.log(large_mag_etas['Magnitude'])*const
const = (1/1.5)
large_mag_usgs['Energy'] = 10**(1.5*large_mag_usgs['mag'])
large_mag_usgs['Energy'] = np.log(large_mag_usgs['mag'])*const
Show code cell source
fig = go.Figure()
def add_cumulative_energy_plot(dataset, color, name_prefix):
large_events = dataset[dataset["Large Event"] == 1]
for i in range(len(large_events) - 1):
start_date = large_events.iloc[i]["Date"]
end_date = large_events.iloc[i + 1]["Date"]
subset = dataset[(dataset["Date"] >= start_date) & (dataset["Date"] < end_date)]
cumulative_energy = subset["Energy"].cumsum()
fig.add_trace(go.Scatter(
x=subset["Date"],
y=cumulative_energy,
mode='lines',
name=f'{name_prefix} Event {i+1}-{i+2}',
line=dict(color=color, width=2)
))
Show code cell source
# Add cumulative energy plots for USGS and ETAS datasets
add_cumulative_energy_plot(large_mag_usgs, 'green', 'USGS')
add_cumulative_energy_plot(large_mag_etas, 'blue', 'ETAS')
fig.update_layout(
title='Cumulative Energy Between Large Events USGS vs ETAS',
xaxis_title='Date',
yaxis_title='Cumulative Energy',
)
fig.show()
# etas['Large'] = etas['Magnitude'] > large_earthquake
# # Find the indices of large earthquakes
# large_earthquake_indices = etas.index[etas['Large']].tolist()
# # Initialize lists to store the sum of energies
# sum_energies_usgs = []
# # Calculate sum of energies between consecutive large earthquakes
# for i in range(len(large_earthquake_indices) - 1):
# start_index = large_earthquake_indices[i]
# end_index = large_earthquake_indices[i + 1]
# # Extract the relevant subset of data
# subset = etas.loc[start_index:end_index]
# # Calculate the sum of energies in this interval
# interval_energy_sum = subset['Energy'].sum()
# # Append the sum of energies to the list
# sum_energies_usgs.append(interval_energy_sum)
# usgs['Large'] = usgs['mag'] > large_earthquake
# # Find the indices of large earthquakes
# large_earthquake_indices = usgs.index[usgs['Large']].tolist()
# # Initialize lists to store the sum of energies
# sum_energies_etas = []
# # Calculate sum of energies between consecutive large earthquakes
# for i in range(len(large_earthquake_indices) - 1):
# start_index = large_earthquake_indices[i]
# end_index = large_earthquake_indices[i + 1]
# # Extract the relevant subset of data
# subset = usgs.loc[start_index:end_index]
# # Calculate the sum of energies in this interval
# interval_energy_sum = subset['Energy'].sum()
# # Append the sum of energies to the list
# sum_energies_etas.append(interval_energy_sum)
# usgs_energy_sum = {'Large Earthquake Interval Index': range(len(sum_energies_usgs)), 'Sum of Energies In USGS': sum_energies_usgs}
# usgs_energy_sum = pd.DataFrame(usgs_energy_sum)
# etas_energy_sum = {'Large Earthquake Interval Index': range(len(sum_energies_etas)), 'Sum of Energies In ETAS': sum_energies_etas}
# etas_energy_sum = pd.DataFrame(etas_energy_sum)
# fig = go.Figure()
# fig.add_trace(go.Scatter(x=usgs_energy_sum['Large Earthquake Interval Index'], y=usgs_energy_sum['Sum of Energies In USGS'],
# mode='lines+markers', name='Sum of Energies In USGS'))
# fig.add_trace(go.Scatter(x=etas_energy_sum['Large Earthquake Interval Index'], y=etas_energy_sum['Sum of Energies In ETAS'],
# mode='lines+markers', name='Sum of Energies In ETAS', line=dict(color='red')))
# fig.update_layout(
# xaxis=dict(title='Large Earthquake Interval Index'),
# yaxis=dict(title='Sum of Energies'),
# title='Sum of Energies between Large Earthquakes'
# )
# fig.show()